1   package org.apache.lucene.analysis.core;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one or more
5    * contributor license agreements.  See the NOTICE file distributed with
6    * this work for additional information regarding copyright ownership.
7    * The ASF licenses this file to You under the Apache License, Version 2.0
8    * (the "License"); you may not use this file except in compliance with
9    * the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  import java.io.Reader;
21  import java.io.StringReader;
22  import java.lang.reflect.Modifier;
23  import java.util.Collections;
24  import java.util.HashMap;
25  import java.util.HashSet;
26  import java.util.IdentityHashMap;
27  import java.util.List;
28  import java.util.Map;
29  import java.util.Set;
30  
31  import org.apache.lucene.analysis.CachingTokenFilter;
32  import org.apache.lucene.analysis.CharFilter;
33  import org.apache.lucene.analysis.CrankyTokenFilter;
34  import org.apache.lucene.analysis.MockCharFilter;
35  import org.apache.lucene.analysis.MockFixedLengthPayloadFilter;
36  import org.apache.lucene.analysis.MockGraphTokenFilter;
37  import org.apache.lucene.analysis.MockHoleInjectingTokenFilter;
38  import org.apache.lucene.analysis.MockRandomLookaheadTokenFilter;
39  import org.apache.lucene.analysis.MockTokenFilter;
40  import org.apache.lucene.analysis.MockTokenizer;
41  import org.apache.lucene.analysis.MockVariableLengthPayloadFilter;
42  import org.apache.lucene.analysis.SimplePayloadFilter;
43  import org.apache.lucene.analysis.TokenFilter;
44  import org.apache.lucene.analysis.TokenStream;
45  import org.apache.lucene.analysis.Tokenizer;
46  import org.apache.lucene.analysis.ValidatingTokenFilter;
47  import org.apache.lucene.analysis.miscellaneous.PatternKeywordMarkerFilter;
48  import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
49  import org.apache.lucene.analysis.path.ReversePathHierarchyTokenizer;
50  import org.apache.lucene.analysis.sinks.TeeSinkTokenFilter;
51  import org.apache.lucene.analysis.snowball.SnowballFilter;
52  import org.apache.lucene.analysis.sr.SerbianNormalizationRegularFilter;
53  import org.apache.lucene.analysis.util.CharFilterFactory;
54  import org.apache.lucene.analysis.util.ResourceLoader;
55  import org.apache.lucene.analysis.util.ResourceLoaderAware;
56  import org.apache.lucene.analysis.util.StringMockResourceLoader;
57  import org.apache.lucene.analysis.util.TokenFilterFactory;
58  import org.apache.lucene.analysis.util.TokenizerFactory;
59  import org.apache.lucene.util.LuceneTestCase;
60  import org.apache.lucene.util.Version;
61  
62  /**
63   * Tests that any newly added Tokenizers/TokenFilters/CharFilters have a
64   * corresponding factory (and that the SPI configuration is correct)
65   */
66  public class TestAllAnalyzersHaveFactories extends LuceneTestCase {
67  
68    // these are test-only components (e.g. test-framework)
69    private static final Set<Class<?>> testComponents = Collections.newSetFromMap(new IdentityHashMap<Class<?>,Boolean>());
70    static {
71      Collections.<Class<?>>addAll(testComponents,
72        MockTokenizer.class,
73        MockCharFilter.class,
74        MockFixedLengthPayloadFilter.class,
75        MockGraphTokenFilter.class,
76        MockHoleInjectingTokenFilter.class,
77        MockRandomLookaheadTokenFilter.class,
78        MockTokenFilter.class,
79        MockVariableLengthPayloadFilter.class,
80        ValidatingTokenFilter.class,
81        CrankyTokenFilter.class,
82        SimplePayloadFilter.class
83      );
84    }
85    
86    // these are 'crazy' components like cachingtokenfilter. does it make sense to add factories for these?
87    private static final Set<Class<?>> crazyComponents = Collections.newSetFromMap(new IdentityHashMap<Class<?>,Boolean>());
88    static {
89      Collections.<Class<?>>addAll(crazyComponents,
90        CachingTokenFilter.class,
91        TeeSinkTokenFilter.class
92      );
93    }
94    
95    // these are oddly-named (either the actual analyzer, or its factory)
96    // they do actually have factories.
97    // TODO: clean this up!
98    private static final Set<Class<?>> oddlyNamedComponents = Collections.newSetFromMap(new IdentityHashMap<Class<?>,Boolean>());
99    static {
100     Collections.<Class<?>>addAll(oddlyNamedComponents,
101       ReversePathHierarchyTokenizer.class, // this is supported via an option to PathHierarchyTokenizer's factory
102       SnowballFilter.class, // this is called SnowballPorterFilterFactory
103       PatternKeywordMarkerFilter.class,
104       SetKeywordMarkerFilter.class,
105       UnicodeWhitespaceTokenizer.class // a supported option via WhitespaceTokenizerFactory
106     );
107   }
108 
109   // The following token filters are excused from having their factory.
110   private static final Set<Class<?>> tokenFiltersWithoutFactory = new HashSet<>();
111   static {
112     tokenFiltersWithoutFactory.add(SerbianNormalizationRegularFilter.class);
113   }
114 
115   private static final ResourceLoader loader = new StringMockResourceLoader("");
116   
117   public void test() throws Exception {
118     List<Class<?>> analysisClasses = TestRandomChains.getClassesForPackage("org.apache.lucene.analysis");
119     
120     for (final Class<?> c : analysisClasses) {
121       final int modifiers = c.getModifiers();
122       if (
123         // don't waste time with abstract classes
124         Modifier.isAbstract(modifiers) || !Modifier.isPublic(modifiers)
125         || c.isSynthetic() || c.isAnonymousClass() || c.isMemberClass() || c.isInterface()
126         || testComponents.contains(c)
127         || crazyComponents.contains(c)
128         || oddlyNamedComponents.contains(c)
129         || tokenFiltersWithoutFactory.contains(c)
130         || c.isAnnotationPresent(Deprecated.class) // deprecated ones are typically back compat hacks
131         || !(Tokenizer.class.isAssignableFrom(c) || TokenFilter.class.isAssignableFrom(c) || CharFilter.class.isAssignableFrom(c))
132       ) {
133         continue;
134       }
135 
136       Map<String,String> args = new HashMap<>();
137       args.put("luceneMatchVersion", Version.LATEST.toString());
138       
139       if (Tokenizer.class.isAssignableFrom(c)) {
140         String clazzName = c.getSimpleName();
141         assertTrue(clazzName.endsWith("Tokenizer"));
142         String simpleName = clazzName.substring(0, clazzName.length() - 9);
143         assertNotNull(TokenizerFactory.lookupClass(simpleName));
144         TokenizerFactory instance = null;
145         try {
146           instance = TokenizerFactory.forName(simpleName, args);
147           assertNotNull(instance);
148           if (instance instanceof ResourceLoaderAware) {
149             ((ResourceLoaderAware) instance).inform(loader);
150           }
151           assertSame(c, instance.create().getClass());
152         } catch (IllegalArgumentException e) {
153           if (e.getCause() instanceof NoSuchMethodException) {
154             // there is no corresponding ctor available
155             throw e;
156           }
157           // TODO: For now pass because some factories have not yet a default config that always works
158         }
159       } else if (TokenFilter.class.isAssignableFrom(c)) {
160         String clazzName = c.getSimpleName();
161         assertTrue(clazzName.endsWith("Filter"));
162         String simpleName = clazzName.substring(0, clazzName.length() - (clazzName.endsWith("TokenFilter") ? 11 : 6));
163         assertNotNull(TokenFilterFactory.lookupClass(simpleName));
164         TokenFilterFactory instance = null; 
165         try {
166           instance = TokenFilterFactory.forName(simpleName, args);
167           assertNotNull(instance);
168           if (instance instanceof ResourceLoaderAware) {
169             ((ResourceLoaderAware) instance).inform(loader);
170           }
171           Class<? extends TokenStream> createdClazz = instance.create(new KeywordTokenizer()).getClass();
172           // only check instance if factory have wrapped at all!
173           if (KeywordTokenizer.class != createdClazz) {
174             assertSame(c, createdClazz);
175           }
176         } catch (IllegalArgumentException e) {
177           if (e.getCause() instanceof NoSuchMethodException) {
178             // there is no corresponding ctor available
179             throw e;
180           }
181           // TODO: For now pass because some factories have not yet a default config that always works
182         }
183       } else if (CharFilter.class.isAssignableFrom(c)) {
184         String clazzName = c.getSimpleName();
185         assertTrue(clazzName.endsWith("CharFilter"));
186         String simpleName = clazzName.substring(0, clazzName.length() - 10);
187         assertNotNull(CharFilterFactory.lookupClass(simpleName));
188         CharFilterFactory instance = null;
189         try {
190           instance = CharFilterFactory.forName(simpleName, args);
191           assertNotNull(instance);
192           if (instance instanceof ResourceLoaderAware) {
193             ((ResourceLoaderAware) instance).inform(loader);
194           }
195           Class<? extends Reader> createdClazz = instance.create(new StringReader("")).getClass();
196           // only check instance if factory have wrapped at all!
197           if (StringReader.class != createdClazz) {
198             assertSame(c, createdClazz);
199           }
200         } catch (IllegalArgumentException e) {
201           if (e.getCause() instanceof NoSuchMethodException) {
202             // there is no corresponding ctor available
203             throw e;
204           }
205           // TODO: For now pass because some factories have not yet a default config that always works
206         }
207       }
208     }
209   }
210 }